In [ ]:
from __future__ import division
import codecs
import pickle
import networkx as nx
from collections import Counter

rcParams['figure.figsize'] = (12.0, 10.0)

In [ ]:
from os.path import abspath
workspace = "/".join(abspath('.').split('/')[:-1])

Note: Make sure that your workspace sees the root directory of openie_eval.


In [ ]:
from openie_eval.openie_eval import semantic_parsing as sp
from openie_eval.openie_eval import ontologization
reload(sp)
reload(ontologization)

from nltk.stem import WordNetLemmatizer
lemmatizer = WordNetLemmatizer()

In [ ]:
keyword = 'hindustani_music'

wiki_entities = codecs.open(home + '/workspace/nerpari/data/ground-truth/'+keyword+'_pages.txt', encoding='utf-8').readlines()
wiki_entities = [i.strip().lower() for i in wiki_entities]

methods = ['reverb', 'openie', 'semantic-parsing']
labels = {'reverb': 'ReVerb', 'openie': 'OpenIE 4.0', 'semantic-parsing': 'Sem. Parsing'}
colors = ['#990033', '#006600', '#330066']

#coref_suffix = ''
coref_suffix = '-coref'

#filtered_suffix = ''
filtered_suffix = '-filtered'

In [ ]:
top_100 = []
for method in methods:
    rules = pickle.load(file(workspace + '/data/results/qualitative/object-identification/rule-based/'+keyword+'/rules.pickle'))
    groundtruth = ontologization.load_groundtruth(keyword, rules.keys())
    
    relations = pickle.load(file(workspace + '/data/'+method+'/'+keyword+'/relations'+coref_suffix+filtered_suffix+'.pickle'))
    relations = [[i['arg1'].lower(), lemmatizer.lemmatize(i['rel'].lower(), pos='v'), i['arg2'].lower()] for i in relations]
    
    concepts = [i[2] for i in relations]
    c = Counter(concepts)
    top_100.append([i[0]  if i[1] > 5 else '--' for i in c.most_common(100)])

In [ ]:
res = zip(top_100[0], top_100[1], top_100[2])
for i in res:
    print "\t".join(i)